# load("xxx.RData")



### PROJECT LEVEL VARIABLES
  
# A) Production Function Category

# Discretized production inputs:

X_I <- where(prodfn)

# Predicted impact:

Y <- Predict(prodfn)

###

# B) Project Level Costs

attach(articles)

# Censor number of authors at 4:

num_authors_trunc <- pmin(num_authors, 4)

# Combine with indicators for skill differences and generalists on the team:

X_C <- as.factor(num_authors_trunc) : as.factor(skill_diff) : as.factor(any_gen50)

# Check:

nlevels(X_C)
levels(X_C)

detach(articles)

# Remove irrelevant cost types (generalists w/out skill differences):

X_C[X_C == "1:FALSE:TRUE"] <- "1:FALSE:FALSE"
X_C[X_C == "2:FALSE:TRUE"] <- "2:FALSE:FALSE"
X_C[X_C == "3:FALSE:TRUE"] <- "3:FALSE:FALSE"
X_C[X_C == "4:FALSE:TRUE"] <- "4:FALSE:FALSE"

# Drop empty levels:

X_C <- droplevels(X_C)

###

# C) Project Category

# Combine cost and production category:

X_P <- X_C : as.factor(X_I)

# Number of categories:

nlevels(X_P)

# Drop empty levels:

X_P <- droplevels(X_P)

# Final number of categories:

nlevels(X_C)
nlevels(X_P)

# Identifiers:

names(X_P) <- rownames(articles)
names(Y) <- rownames(articles)
